%% Train the DDPG Agent

% Create the environment
env = MyWorLD();

observationInfo = getObservationInfo(env);
actionInfo = getActionInfo(env);


ActorOptimizerOptions = rlOptimizerOptions(...
    'GradientThreshold', 1, ...                                           
    'LearnRate', 0.0001);                                                 

CriticOptimizerOptions = rlOptimizerOptions(...
    'GradientThreshold', 1, ...                                        
    'LearnRate', 0.0001);    


ddpgOptions = rlDDPGAgentOptions(...
    'SampleTime', 0.3, ...                                                  
    'TargetSmoothFactor', 0.001, ...                                    
    'TargetUpdateFrequency', 1, ...                                       
    'ExperienceBufferLength',2e5, ...    
    'DiscountFactor', 0.99, ...           
    'MiniBatchSize', 128, ...               
    'NumStepsToLookAhead', 1, ... 
    'CriticOptimizerOptions', CriticOptimizerOptions, ...
    'ActorOptimizerOptions', ActorOptimizerOptions, ...
    'SaveExperienceBufferWithAgent', true );
  

ddpgOptions.NoiseOptions.StandardDeviation = [0.9; 0.9];                
ddpgOptions.NoiseOptions.StandardDeviationDecayRate = 1e-3;               


Agent = rlDDPGAgent(observationInfo, actionInfo, ddpgOptions);



trainOpts = rlTrainingOptions(...
    'MaxEpisodes', 1000, ...
    'MaxStepsPerEpisode', 160, ...   
    'Verbose', false, ...
    'ScoreAveragingWindowLength', 10, ...
    'StopTrainingCriteria', 'None', ...
    'Plots', 'training-progress', ...
    SaveAgentValue = -20, ...
    SaveAgentCriteria = "AverageReward");


trainingStats = train(Agent, env, trainOpts);




%% Simulating the trained agent 


simOptions = rlSimulationOptions('MaxSteps', 1000);
experience = sim(env, Agent, simOptions);



rew = experience.Reward;
obs = experience.Observation.Observation;
timeVector = rew.Time;

% Reshape Observation data to 2D
obsData = reshape(obs.Data, size(obs.Data, 1), size(obs.Data, 3));


figure;
subplot(2, 1, 1);
plot(timeVector, rew.Data, '-o');
title('Reward over Time');
xlabel('Time (s)');
ylabel('Reward');

subplot(2, 1, 2);
plot([0;timeVector], obsData(1:4,:), '-o');
title('Observation over Time');
xlabel('Time (s)');
ylabel('Observation');
legend('Drone_X', 'Drone_Y', 'Target_X', 'Target_Y');

sgtitle('Reward and Observation Plots');



%% To visualize the Training history of agent after training

% TrainingHistory = env.TrainingHistory;

figure;
axis([1,200, 1,200, 0,20]);
hold on

dronePlot = plot3(0, 0, 0, 'ro', 'MarkerSize', 10, 'MarkerFaceColor', 'r');
goalPlot = plot3(0, 0, 0, 'go', 'MarkerSize', 10, 'MarkerFaceColor', 'g');


obstacleHandles = {};                                                    
pathHandle = [];                                                         


for i = 45000:size(TrainingHistory, 2)                                    % choose i
    currentState = TrainingHistory(1, i);                                
    currentState = cell2mat(currentState);

    Buildings = TrainingHistory(2, i);                                   
    Buildings = cell2mat(Buildings);

    % optimalpath = TrainingHistory(3, i);                               
    % optimalpath = cell2mat(optimalpath);
    % 
    % Ray_Path = TrainingHistory(4, i);                                  
    % Ray_Path = Ray_Path{1,1};
    %
    % SPL = TrainingHistory(5, i);                                       
    % SPL = SPL{1,1};


    if ~isempty(Buildings)                                               

        if ~isempty(obstacleHandles)
            for j = 1:numel(obstacleHandles)
                if ishandle(obstacleHandles{j})
                    delete(obstacleHandles{j});
                end
            end
            obstacleHandles = {};                                       
            delete(pathHandle);
            pathHandle = [];
        end


        thismap = TurinMaps{Buildings(1), Buildings(2)};                 
        thisdensity = DensityMaps{Buildings(1), Buildings(2)};            

        isoSurfaceObj = isosurface(thismap, 0.5);                             
        obstacleHandles{end+1} = patch(isoSurfaceObj, 'FaceColor', 'blue', 'EdgeColor', 'k');

        imagesc(thisdensity);                                            
        colormap(parula);
        colorbar;

        % pathHandle(end+1) = plot(optimalpath(:,2), optimalpath(:,1), 'r', 'LineWidth', 2); %plot the optimal path

    end

    set(dronePlot, 'XData', currentState(1), 'YData', currentState(2), 'ZData', 20); 
    set(goalPlot, 'XData', currentState(3), 'YData', currentState(4), 'ZData', 20);


    drawnow;
    pause(0.042);

end


